package com.wuxl.prada;

/**
 * Description:
 *
 * @author 诸葛小猿
 * @date 2024-12-19
 */
import java.util.ArrayList;
import java.util.List;

public class WordExtractor {
    public static List<WordTimestamp> extractWords(List<SrtParser.Subtitle> subtitles) {
        List<WordTimestamp> wordTimestamps = new ArrayList<>();
        for (SrtParser.Subtitle subtitle : subtitles) {
            String text = subtitle.getText();
            String[] words = text.split("\\s+");
            for (String word : words) {
                if(word.length()<4) continue;
//                System.out.println(word.toLowerCase());
                wordTimestamps.add(new WordTimestamp(word.toLowerCase(), subtitle.getStartTime(), subtitle.getEndTime()));
            }
        }
        return wordTimestamps;
    }

    public static class WordTimestamp {
        private final String word;
        private final String startTime;
        private final String endTime;

        public WordTimestamp(String word, String startTime, String endTime) {
            this.word = word;
            this.startTime = startTime;
            this.endTime = endTime;
        }

        public String getWord() {
            return word;
        }

        public String getStartTime() {
            return startTime;
        }

        public String getEndTime() {
            return endTime;
        }
    }
}
